suppressPackageStartupMessages({
library(tidyverse)
library(ComplexHeatmap)
library(cowplot)
library(ggpubr)
library(readxl)
library(RColorBrewer)
library(MetBrewer)
library(viridis)
library(Hmisc)
library(emmeans)
})
## Warning: package 'Hmisc' was built under R version 4.3.3
path <- "/Users/craposo/Desktop/Github Upload/9_LongtidunalAntiPDL1"
plot.theme <- theme_cowplot() + theme(plot.title = element_text(hjust = 0.5, face = "plain"),
strip.background = element_rect(fill = "transparent"), strip.text = element_text(size = 14)
)
plot.theme.box <- theme_bw() +
theme(plot.title = element_text(hjust = 0.5 , size = 14, color = "black"),
plot.subtitle = element_text(hjust = 0.5 , size = 10, color = "black"),
axis.text = element_text(color = "black", size = 12),
axis.title = element_text(size = 12),
axis.ticks = element_line(color = "black", linewidth = 0.7),
strip.background = element_blank(), strip.text = element_text(hjust = 0.5, size = 14),
panel.border = element_rect(colour = "black", linewidth = 0.7),
panel.grid = element_blank()
)
pop.pal <- c(
"Tcm" = "#65c7ca",
"Tex-KLR" = "#E27E73" ,
"Tex-Int" = "#de311e",
"Tex-Term" = "#b31e0e",
"Tex-Prog" = "#a94d9a",
"Tem_1" = "#f8a523",
"Tem_2" = "#f6d13a"
)
traj_pal <- c(brewer.pal(n = 8, name = "Paired"))
names(traj_pal) <- c(paste0("Traj_", 1:8))
group.pal <- c("PBS" = "#886231", "aPDL1" = "#299093")
# Specify the directory path and get all files in the directory
bTCR_datapath <- paste0(path, "/data/rawdata")
samples <- list.files(path = bTCR_datapath)
# read in all csvs and append sample ID
bTCR_raw_list <- lapply(samples, function(x){
sample.id <- substr(x, 27, 32)
path <- paste0(bTCR_datapath, "/TCRlongtiduinal_Processed_" , sample.id , "_pep.csv")
data <- read.csv(path)
data$Sample.id <- sample.id
return(data)
})
# append all TCR raw data together
bTCR_rawdata <- do.call(rbind, bTCR_raw_list)
# read in sample IDs and append to bTCR_rawdata
sample.id <- read.csv(paste0(path, "/data/SampleInfo.csv")) %>% select(Sample, Sample.id)
bTCR_rawdata <- merge(bTCR_rawdata, sample.id, by = "Sample.id") %>%
mutate(CDR3.nuc.= toupper(CDR3.nuc.)) # make CDR3 all upper case to make futures things easier
bTCR_rawdata %>% head()
## Sample.id CDR3.pep. V VRefBegin VRefEnd VReadBegin VReadEnd
## 1 341724 ASSPGTYEQY mTRBV3*01 120 285 0 165
## 2 341724 ASRNRGINQDTQY mTRBV13-3*01 96 281 4 189
## 3 341724 ASSFGTGGTEVF mTRBV14*01 103 288 12 197
## 4 341724 ASGDCNTEVF mTRBV13-2*04 91 279 14 202
## 5 341724 ASSLSGTSNTEVF mTRBV12-2*01 98 284 7 193
## 6 341724 ASSRQGSTEVF mTRBV12-2*01 103 283 17 197
## D DRefBegin DRefEnd DReadBegin DReadEnd J JRefBegin JRefEnd
## 1 mTRBD1*01 0 6 168 174 mTRBJ2-7*01 4 47
## 2 mTRBD1*01 3 10 191 198 mTRBJ2-5*01 0 49
## 3 mTRBD1*01 0 8 197 205 mTRBJ1-1*01 4 48
## 4 - - - - - mTRBJ1-1*01 2 48
## 5 mTRBD1*01 0 6 197 203 mTRBJ1-1*01 0 48
## 6 mTRBD1*01 2 10 197 205 mTRBJ1-1*01 4 48
## JReadBegin JReadEnd C CRefBegin CRefEnd CReadBegin CReadEnd
## 1 174 217 mTRBC2*03 0 31 217 248
## 2 202 251 mTRBC2*03 0 31 251 282
## 3 208 252 mTRBC2*03 0 31 252 283
## 4 205 251 mTRBC2*03 0 31 251 282
## 5 204 252 mTRBC2*03 0 31 252 283
## 6 207 251 mTRBC2*03 0 31 251 282
## joinedSeq
## 1 cagcagatggagtttctggttaatttctACAATGGTAAAGTCATGGAGAAGTCTAAACTGTTTAAGGATCAGTTTTCAGTTGAAAGACCAGATGGTTCATATTTCACTCTGAAAATCCAACCCACAGCACTGGAGGACTCAGCTGTGTACTTCTGTGCCAGCAGCCCCGGGACATATGAACAGTACTTCGGTCCCGGCACCAGGCTCACGGTTTTAGAGGATCTGAGAAATGTGACTCCacccaaggt
## 2 cttttactggtatcggcaggACACGGGGCACGGGCTGAGGCTGATTCATTACTCATATGTCGCTGACAGCACGGAGAAAGGAGATATCCCTGATGGGTACAAGGCCTCCAGACCAAGCCAAGAGAATTTCTCTCTCATTCTGGAGTTGGCTTCCCTTTCTCAGACAGCTGTATATTTCTGTGCCAGCAGGAACAGGGGAATTAACCAAGACACCCAGTACTTTGGGCCAGGCACTCGGCTCCtcgtgttagaggatctgagaaatgtgactccacccaaggt
## 3 ttccgatcttcgatcagcagcccagagACCAGGGGCCCCAGCTTCTAGTTTACTTTCGGGATGAGGCTGTTATAGATAATTCACAGTTGCCCTCGGATCGATTTTCTGCTGTGAGGCCTAAAGGAACTAACTCCACTCTCAAGATCCAGTCTGCAAAGCAGGGCGACACAGCCACCTATCTCTGTGCCAGCAGTTTCGGGACAGGAGGCACAGAAGTCTTCTTTGGTAAAGGAACCAGACTCACAGTTGTAGAGGATCTGAGAAATGTGACTCCACCCaaggt
## 4 tcttccgatctgggactggtatcggcaggACACGGGGCATGGGCTGAGGCTGATCCATTATTCATATGGTGCTGGCAGCACTGAGAAAGGAGATATCCCTGATGGATACAAGGCCTCCAGACCAAGCCAAGAGAACTTCTCCCTCATTCTGGAGTTGGCTACCCCCTCTCAGACATCAGTGTACTTCTGTGCCAGCGGTGATTGTAACACAGAAGTCTTCTTTGGTAAAGGAACCAGACTCACAGTTGTAGaggatctgagaaatgtgactccacccaaggt
## 5 ttccgatctggtatcaacagactcaggGGCAGGAACTAAAGTTCTTCATTCAGCATTATGATAAAATGGAGAGAGATAAAGGAAACCTGCCCAGCAGATTCTCAGTCCAACAGTTTGATGACTATCACTCTGAGATGAACATGAGTGCCTTGGAGCTAGAGGACTCTGCCGTGTACTTCTGTGCCAGCTCTCTTTCCGGGACATCAaacacagaagtcttctttggtaaaggaaccagactcacagttgtagaggatctgagaaatgtgactccacccaaggt
## 6 tgctcttccgatcttggatcaacagactcaggGGCAGGAACTAAAGTTCTTCATTCAGCATTATGATAAAATGGAGAGAGATAAAGGAAACCTGCCCAGCAGATTCTCAGTCCAACAGTTTGATGACTATCACTCTGAGATGAACATGAGTGCCTTGGAGCTAGAGGACTCTGCCGTGTACTTCTGTGCCAGCTCTCGACAGGGGAGCACAGAAGTCTTCTTTGGTAAAGGAACCAGACTCACAGTTGTAGAGGATCTGAGaaatgtgactccacccaaggt
## CDR3.nuc. copy Sample
## 1 GCCAGCAGCCCCGGGACATATGAACAGTAC 1 C103_d100_aPDL1_cTcm
## 2 GCCAGCAGGAACAGGGGAATTAACCAAGACACCCAGTAC 1 C103_d100_aPDL1_cTcm
## 3 GCCAGCAGTTTCGGGACAGGAGGCACAGAAGTCTTC 8 C103_d100_aPDL1_cTcm
## 4 GCCAGCGGTGATTGTAACACAGAAGTCTTC 11 C103_d100_aPDL1_cTcm
## 5 GCCAGCTCTCTTTCCGGGACATCAAACACAGAAGTCTTC 14 C103_d100_aPDL1_cTcm
## 6 GCCAGCTCTCGACAGGGGAGCACAGAAGTCTTC 1 C103_d100_aPDL1_cTcm
# check number of TCRs per sample
bTCR_rawdata %>% count(Sample) %>% ggplot(aes(x = Sample, y = n)) + geom_col() + scale_y_log10() + rotate_x_text(90)
# select only the cdr3 and count columns
cdr3.df <- bTCR_rawdata %>% dplyr::select(CDR3.nuc. , copy, Sample) %>%
mutate(Rep = ifelse(substr(Sample , nchar(Sample) -1, nchar(Sample)) == "_r", "R2", "R1")) %>% # add replicate column
mutate(Sample = ifelse(Rep == "R2", substr(Sample, 1, nchar(Sample) -2), Sample)) # remove replicate from sample ID
# QC check on replicate samples
## everything looks highly concordant between replicates, the one exception is d21 prog - likely because just so few cells
cdr3.df %>% pivot_wider(id_cols = c(CDR3.nuc. , Sample), values_from = copy, names_from = Rep, values_fn = {sum}) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
ggplot(aes(x = R1, y = R2)) +
geom_point(alpha = .5, size = 0.5) +
geom_smooth(method = "lm", color = "red", fill = "transparent") +
facet_wrap(~Sample, scales = "free") +
stat_cor() + scale_x_log10() + scale_y_log10() + theme_bw() + ggtitle("all clones")
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Transformation introduced infinite values in continuous y-axis
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 218962 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 218962 rows containing non-finite values (`stat_cor()`).
# average clone clounts across replicates
## make vector only where clone is in both replicates
both.rep.clones <- cdr3.df %>% pivot_wider(id_cols = c(CDR3.nuc. , Sample), values_from = copy, names_from = Rep, values_fn = {sum}) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
filter(R1 >0, R2 > 0) %>%
dplyr::select(CDR3.nuc.) %>% c()
## create new clone.df with average reads per sample
clone.df <- cdr3.df %>% filter(Sample != "PosCtrl_m20210506", CDR3.nuc. %in% both.rep.clones$CDR3.nuc.) %>% # remove postive control, select only shared clones
group_by(Rep, Sample, CDR3.nuc.) %>% mutate(ReadFrac = sum (copy)) %>% ungroup() %>% # any TCRs with the same CDR3 add together
group_by(Rep, Sample) %>% mutate(ReadFrac = copy / sum (copy)) %>% # convert copies to fraction of reads
ungroup() %>% group_by(Sample, CDR3.nuc.) %>% dplyr::select(-c(Rep, copy)) %>% summarise(ReadFrac = mean(ReadFrac)) %>% # average read fraction between the two reads
dplyr::rename(TRB_CDR3 = "CDR3.nuc.")
## `summarise()` has grouped output by 'Sample'. You can override using the
## `.groups` argument.
# add in cell numbers from sort - adjust the read fractions to cell #
sampleinfo <- read_xlsx(paste0(path, "/data/CellNumbersSorted.xlsx")) %>%
mutate(Sample = str_replace_all(Sample, " ", "_")) %>% dplyr::select("Sample", "ActualCellNumbner_FlowData", "SortedCellNumber") %>%
dplyr::rename(TotalCellNumber = "ActualCellNumbner_FlowData")
clone.df <- merge(clone.df, sampleinfo, by = "Sample") %>%
mutate(TotalNumber = ReadFrac * TotalCellNumber, SortedNumber = ReadFrac * SortedCellNumber) %>%
ungroup()
# separate DF for all phnotype sorting vs tet sorting
clone.df.pheno <- clone.df %>% filter(Sample != "C103_GP33neg",Sample != "C103_GP33pos") %>%
### mutate sample ID into more useful metadata
separate(Sample, into = c('experiment', 'dpi', 'group', 'sort')) %>%
mutate(TRB_CDR3 = paste(group, TRB_CDR3, sep = "_")) %>%
### TotalNumberClone is number of cells per SPL or per 100uL - this is for determining the clonal behavior
### SortedNumberClone is the number of cells actually sorted - this is for determining how deeply we actually sequenced
group_by(TRB_CDR3, dpi, group) %>% mutate(TotalNumberClone = sum(TotalNumber), SortedNumberClone = sum(SortedNumber)) %>% ungroup() %>%
### CloneFreq is a given clones frequemcy compared to all other clones in the timepoint
group_by(dpi, group) %>% mutate(CloneFreq = TotalNumberClone/sum(TotalNumberClone)) %>% ungroup() %>%
### remove cell number info from sorting data, leave only clones #s
dplyr::select(-c(TotalCellNumber, SortedCellNumber))
# rename cells by new names for celltypes
clone.df.pheno <- clone.df.pheno %>%
mutate(sort = case_when(
sort == "cTcm" ~ "Tcm",
sort == "Prog" ~ "Tex-Prog",
sort == "Term" ~ "Tex-Term",
sort == "Int" & dpi == "d100" ~ "Tem_2",
sort == "KLR" & dpi == "d100" ~ "Tem_1",
sort == "Int" & dpi != "d100" ~ "Tex-Int",
sort == "KLR" & dpi != "d100" ~ "Tex-KLR"
))
# check clones per phenotype
table(clone.df.pheno$sort, clone.df.pheno$dpi, clone.df.pheno$group)
## , , = aPDL1
##
##
## d100 d21 d35
## Tcm 1542 0 0
## Tem_1 1524 0 0
## Tem_2 1032 0 0
## Tex-Int 0 363 989
## Tex-KLR 0 289 785
## Tex-Prog 1002 154 1069
## Tex-Term 485 0 0
##
## , , = Ctrl
##
##
## d100 d21 d35
## Tcm 7552 0 0
## Tem_1 1485 0 0
## Tem_2 980 0 0
## Tex-Int 0 439 867
## Tex-KLR 0 352 776
## Tex-Prog 786 141 178
## Tex-Term 456 0 0
# visualize clone sizes
clone.df.pheno %>% ggplot(aes(x = SortedNumberClone, y = ReadFrac)) +
geom_point(alpha = 0.1) + facet_wrap(group~dpi) +
scale_x_log10() + scale_y_log10() + geom_vline(xintercept = 10, color = "red", linetype = "dashed")
# isolate clones detected at d100 and also at at least one timepoint in the blood
## blood d21 and 35
d21.clones <- clone.df.pheno %>% filter(SortedNumberClone >= 10, dpi == "d21")
d35.clones <- clone.df.pheno %>% filter(SortedNumberClone >= 10, dpi == "d35")
blood.clones <- c(d21.clones$TRB_CDR3, d35.clones$TRB_CDR3) %>% unique()
## spleen d100+
d100.clones <- clone.df.pheno %>% filter(SortedNumberClone >= 10, dpi == "d100")
# set clones of interest as those found in the blood and the spleen
clones.of.interest <- intersect(blood.clones, d100.clones$TRB_CDR3)
# day 21 ==============================================================
clone.freq.d21 <- clone.df.pheno %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
mutate(freq = (TotalNumber/TotalNumberClone)) %>%
dplyr::select(sort, dpi, TRB_CDR3, freq, CloneFreq) %>%
filter(dpi == "d21")
wide <- clone.freq.d21 %>%
pivot_wider(names_from = sort, values_from = freq) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
separate(TRB_CDR3, into = c("Group", "TRB_CDR3"))
kmeans <- wide %>% select(-c(dpi, TRB_CDR3, Group, CloneFreq, Group)) %>% kmeans(centers = 4)
wide$Cluster <- kmeans$cluster
wide %>% ungroup() %>% select(-c(dpi, TRB_CDR3,Group, CloneFreq, Cluster)) %>%
Heatmap(split = wide$Cluster, col = brewer.pal(9, "Blues"), border = T, column_title = "Day 21")
## Warning: The input is a data frame-like object, convert it to a matrix.
# day 35 ==============================================================
clone.freq.d35 <- clone.df.pheno %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
mutate(freq = (TotalNumber/TotalNumberClone)) %>%
dplyr::select(sort, dpi, TRB_CDR3, freq, CloneFreq) %>%
filter(dpi == "d35")
wide <- clone.freq.d35 %>%
pivot_wider(names_from = sort, values_from = freq) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
separate(TRB_CDR3, into = c("Group", "TRB_CDR3"))
kmeans <- wide %>% select(-c(dpi, TRB_CDR3, Group, CloneFreq, Group)) %>% kmeans(centers = 4)
wide$Cluster <- kmeans$cluster
wide %>% ungroup() %>% select(-c(dpi, TRB_CDR3,Group, CloneFreq, Cluster)) %>%
Heatmap(split = wide$Cluster, col = brewer.pal(9, "Blues"), border = T, column_title = "Day 35" , name = "freq of clone")
## Warning: The input is a data frame-like object, convert it to a matrix.
# day 100 ==============================================================
clone.freq.d100 <- clone.df.pheno %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
mutate(freq = (TotalNumber/TotalNumberClone)) %>%
dplyr::select(sort, dpi, TRB_CDR3, freq, CloneFreq) %>%
filter(dpi == "d100")
wide <- clone.freq.d100 %>%
pivot_wider(names_from = sort, values_from = freq) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
filter(TRB_CDR3 %in% clones.of.interest) %>%
separate(TRB_CDR3, into = c("Group", "TRB_CDR3"))
kmeans <- wide %>% select(-c(dpi, TRB_CDR3, Group, CloneFreq, Group)) %>% kmeans(centers = 7)
wide$Cluster <- kmeans$cluster
wide %>% ungroup() %>% select(-c(dpi, TRB_CDR3,Group, CloneFreq, Cluster)) %>%
Heatmap(split = wide$Cluster, col = brewer.pal(9, "Blues"), border = T, column_title = "Day 100+", name = "freq of clone")
## Warning: The input is a data frame-like object, convert it to a matrix.
#### all Timpoints =============================================
all.tp.wide <- rbind(clone.freq.d21, clone.freq.d35) %>%
rbind(clone.freq.d100) %>%
mutate(sort = paste(dpi,sort, sep = "___")) %>%
select(-c(CloneFreq, dpi)) %>%
pivot_wider(names_from = sort, values_from = freq) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .)))
# heatmap of clones across TP
all.tp.wide %>% ungroup() %>% select(-c(TRB_CDR3)) %>%
Heatmap(col = brewer.pal(9, "Blues"), border = T, cluster_columns = F, show_row_dend = F, name = "Freq of clone at timepoint")
## Warning: The input is a data frame-like object, convert it to a matrix.
# check to make sure correlation between subsets
all.tp.wide %>% ungroup() %>% select(-c(TRB_CDR3)) %>% cor() %>% Heatmap(name = "Pearson R", column_title = "Correlation of Clone Freq", col = brewer.pal(9, "RdYlBu") %>% rev, border = T)
# clone frequency - clone and phenotype as proportion of all cells at that timepoint
clone.freq.longitudinal <- clone.df.pheno %>%
mutate(freq = (TotalNumber/TotalNumberClone)*CloneFreq) %>% dplyr::select(sort, dpi, TRB_CDR3, freq, CloneFreq, CloneFreq) %>%
mutate(name = paste(dpi, sort, sep = "--")) %>%
dplyr::select(TRB_CDR3, name, freq)%>% pivot_wider(values_from = freq) %>%
mutate(across(everything(), ~ ifelse(is.na(.), 0, .))) %>%
filter(TRB_CDR3 %in% clones.of.interest)
head(clone.freq.longitudinal)
## # A tibble: 6 × 12
## TRB_CDR3 `d100--Tcm` `d100--Tem_2` `d100--Tem_1` `d100--Tex-Prog`
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 aPDL1_GCCAGCAATAGTCA… 0.000181 0 0.0000605 0
## 2 aPDL1_GCCAGCACACAGGG… 0.0000320 0 0.00000962 0
## 3 aPDL1_GCCAGCACCATAGA… 0.00114 0.00151 0.00209 0.000143
## 4 aPDL1_GCCAGCAGACATGC… 0.000153 0.000225 0.0000177 0.00000846
## 5 aPDL1_GCCAGCAGATATGG… 0.000216 0.000150 0.0000466 0.000102
## 6 aPDL1_GCCAGCAGATCCGG… 0.000674 0.0000581 0.000165 0.0000367
## # ℹ 7 more variables: `d100--Tex-Term` <dbl>, `d21--Tex-Int` <dbl>,
## # `d21--Tex-KLR` <dbl>, `d21--Tex-Prog` <dbl>, `d35--Tex-Int` <dbl>,
## # `d35--Tex-KLR` <dbl>, `d35--Tex-Prog` <dbl>
# Transpose the desired data for correlation
df_transposed <- dplyr::select(clone.freq.longitudinal, -TRB_CDR3) %>% t
# Calculate the correlation matrix
correlation_matrix <- cor(df_transposed, use = "pairwise.complete.obs", method = "pearson")
# Convert correlation to distance matrix and perform hierarchical clustering
dist_matrix <- as.dist(1 - correlation_matrix)
hc <- hclust(dist_matrix, method = "complete")
plot(hc) # Plot dendrogram
# Cut tree into k clusters
set.seed(23)
clusters <- cutree(hc, k=8)
# add behavior info to clone.freq.longitudinal
clone.freq.longitudinal$Behavior <- paste0("T_" , clusters)
# # rename behaviors - group by pattern of expansion or contraction
clone.freq.longitudinal <- clone.freq.longitudinal %>% mutate(Behavior = case_when(
Behavior == "T_1" ~ "Traj_1", # contracting
Behavior == "T_5" ~ "Traj_2",
Behavior == "T_8" ~ "Traj_3",
Behavior == "T_7" ~ "Traj_4",
Behavior == "T_4" ~ "Traj_5", # expand then contract
Behavior == "T_6" ~ "Traj_6",
Behavior == "T_2" ~ "Traj_7", # expand whole time
Behavior == "T_3" ~ "Traj_8"
))
# mutate metadata of clone.freq.longitudinal
clone.freq.longitudinal <- clone.freq.longitudinal %>% separate(TRB_CDR3, into = c("group", "CDR3")) %>%
mutate(group = ifelse(group == "Ctrl" , "PBS", "aPDL1")) %>%
mutate(group = factor(group, levels = c("PBS", "aPDL1")))
# plot heatmap with clusters
hm <- Heatmap(correlation_matrix,
name = "Pearson R",
column_title = "Pairwise Clone Correlations",
# col = viridis(9),
col = brewer.pal(9, "RdBu") %>% rev(),
show_row_names = FALSE,show_column_names = FALSE,
cluster_rows = TRUE, cluster_columns = TRUE, show_row_dend = F, show_column_dend = F,
left_annotation = rowAnnotation(` ` = clone.freq.longitudinal$Behavior, show_legend = F, col = list (` ` = traj_pal)),
top_annotation = columnAnnotation(` ` = clone.freq.longitudinal$Behavior, show_legend = F, col = list (` ` = traj_pal)),
column_split = clone.freq.longitudinal$Behavior, row_split = clone.freq.longitudinal$Behavior,
row_title_rot = 0,
border = FALSE, row_gap = unit(0, "cm"), column_gap = unit(0, "cm"),
height = unit(3, "in"), width = unit(3, "in")
# use_raster = T
)
hm
# freq of clone behaviors per group
clone.freq.longitudinal %>%
count(Behavior, group) %>%
group_by(group) %>% mutate(n = n/sum(n)) %>%
ggplot(aes(x = group, y = n*100, fill = Behavior)) +
geom_col(show.legend = T) +
plot.theme +
scale_y_continuous(expand = expansion(mult = c(0.0, 0.05))) +
labs(x = "Group", y = "% of Clones") +
scale_fill_manual(values = traj_pal) + rotate_x_text(45)
# summary plots for clone behaviors
data_long <- clone.freq.longitudinal %>%
pivot_longer(cols = -c(group, CDR3, Behavior),
names_to = c("Day", "Sort"),
names_sep = "--",
values_to = "Value")
# summarize behavior per timepoint
data_summary <- data_long %>%
group_by(Behavior, Day, Sort) %>%
summarise(Average_Value = mean(Value, na.rm = TRUE)) %>%
ungroup()
## `summarise()` has grouped output by 'Behavior', 'Day'. You can override using
## the `.groups` argument.
data_summary %>% group_by(Day, Behavior) %>% mutate(n = Average_Value/sum(Average_Value)) %>% select(-Average_Value) %>%pivot_wider(names_from = Sort, values_from = n)
## # A tibble: 24 × 9
## # Groups: Day, Behavior [24]
## Behavior Day Tcm Tem_1 Tem_2 `Tex-Prog` `Tex-Term` `Tex-Int`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Traj_1 d100 0.317 0.502 0.152 0.0239 0.00501 NA
## 2 Traj_1 d21 NA NA NA 0.0603 NA 0.207
## 3 Traj_1 d35 NA NA NA 0.0877 NA 0.377
## 4 Traj_2 d100 0.212 0.449 0.234 0.0507 0.0537 NA
## 5 Traj_2 d21 NA NA NA 0.0228 NA 0.919
## 6 Traj_2 d35 NA NA NA 0.0850 NA 0.516
## 7 Traj_3 d100 0.716 0.160 0.0893 0.0238 0.0112 NA
## 8 Traj_3 d21 NA NA NA 0.255 NA 0.639
## 9 Traj_3 d35 NA NA NA 0.305 NA 0.354
## 10 Traj_4 d100 0.580 0.165 0.0834 0.114 0.0574 NA
## # ℹ 14 more rows
## # ℹ 1 more variable: `Tex-KLR` <dbl>
data_summary %>% group_by(Day, Behavior) %>% pivot_wider(names_from = Sort, values_from = Average_Value)
## # A tibble: 24 × 9
## # Groups: Day, Behavior [24]
## Behavior Day Tcm Tem_1 Tem_2 `Tex-Prog` `Tex-Term` `Tex-Int`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Traj_1 d100 0.0000799 0.000126 3.83e-5 0.00000603 1.26e-6 NA
## 2 Traj_1 d21 NA NA NA 0.0000842 NA 0.000290
## 3 Traj_1 d35 NA NA NA 0.0000368 NA 0.000158
## 4 Traj_2 d100 0.0000345 0.0000731 3.81e-5 0.00000825 8.73e-6 NA
## 5 Traj_2 d21 NA NA NA 0.0000137 NA 0.000552
## 6 Traj_2 d35 NA NA NA 0.0000261 NA 0.000159
## 7 Traj_3 d100 0.000197 0.0000442 2.47e-5 0.00000656 3.08e-6 NA
## 8 Traj_3 d21 NA NA NA 0.000195 NA 0.000490
## 9 Traj_3 d35 NA NA NA 0.000109 NA 0.000127
## 10 Traj_4 d100 0.0000609 0.0000173 8.76e-6 0.0000120 6.03e-6 NA
## # ℹ 14 more rows
## # ℹ 1 more variable: `Tex-KLR` <dbl>
# plot phenotypes per trajectory
plot.pheno <- data_summary %>%
mutate(Day = case_when(
Day == "d21" ~ "21 dpi",
Day == "d35" ~ "35 dpi",
Day == "d100" ~ "100+ dpi")) %>%
ggplot(aes(x = Day, y = Average_Value, fill = Sort, group = Sort)) +
geom_col() +
facet_wrap(~Behavior, ncol = 4, scales = "free") +
scale_x_discrete(limits = c("21 dpi", "35 dpi", "100+ dpi")) +
scale_fill_manual(values = pop.pal) +
plot.theme + rotate_x_text(45) + labs(x = "Timepoint", y = "Freq of Cells (Avg. per Clone)") +
scale_y_continuous(expand = expansion(mult = c(0.0, 0.05)), labels = function(x) format(x, scientific = TRUE)) +
theme(legend.position = "none")
plot.pheno
# new dataframe to hold longitudinal data
df_totalfreq <- clone.freq.longitudinal %>%
rowwise() %>%
mutate(
d21Prop = sum(c_across(starts_with("d21"))),
d35Prop = sum(c_across(starts_with("d35"))),
d100Prop = sum(c_across(starts_with("d100")))
) %>%
ungroup()
# Step 2: Reshape data from wide to long
df_long <- df_totalfreq %>%
select(group, CDR3, Behavior, d21Prop, d35Prop, d100Prop) %>%
pivot_longer(
cols = c(d21Prop, d35Prop, d100Prop),
names_to = "TimePoint",
values_to = "Proportion"
) %>%
mutate(TimePoint = factor(TimePoint, levels = c("d21Prop", "d35Prop", "d100Prop")))
# plot all clones sizes
ggplot(data = df_long) +
# Add individual lines with low alpha
geom_line(aes(x = TimePoint, y = Proportion, group = interaction(CDR3)), alpha = 0.1) +
facet_wrap(~ Behavior, ncol = 4) +
labs(x = "Timepoint", y = "Freq of Cells (Log Scale)") +
scale_x_discrete(labels = c("21 dpi", "35 dpi", "100+ dpi")) +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
scale_color_manual(values = c(group.pal), name = "Group") +
scale_y_log10()
## Warning: Transformation introduced infinite values in continuous y-axis
# plot average clone size per group clone sizes
df_avg <- df_long %>%
group_by(TimePoint, Behavior) %>%
summarise(AvgProportion = mean(Proportion), sem = sd(Proportion)/sqrt(length(Proportion))) %>%
mutate(trend = ifelse(Behavior %in% c(paste0("Traj_", 1:4)),"Contracting", "Expanding"))
## `summarise()` has grouped output by 'TimePoint'. You can override using the
## `.groups` argument.
df_avg %>%
ggplot(aes(x = TimePoint, color = Behavior, y = AvgProportion)) +
geom_errorbar(aes(ymin = AvgProportion - sem, ymax = AvgProportion + sem), width = 0.05) +
geom_line(aes(group = Behavior)) +
geom_point(size = 1.5) +
facet_wrap(~trend, scales = "free") +
ylim(0,0.0016) +
plot.theme +
scale_x_discrete(labels = c("21 dpi", "35 dpi", "100+ dpi")) + labs(x = "Timepoint", y = "Freq of Cells", color = "Trajectory") +
scale_color_manual(values = brewer.pal(10, "Paired")) + rotate_x_text(45) +
theme(legend.position = "right")
# create new dataframe that shows intracelonal phenotype per timepont
all.tp.withclusters <- merge(x = clone.freq.longitudinal %>% select(group, CDR3, Behavior),
y = all.tp.wide %>% separate(TRB_CDR3, into = c("group", "CDR3")) %>% # from above chunk looking at individual TP
mutate(group = ifelse(group == "Ctrl" , "PBS", "aPDL1")) %>%
mutate(group = factor(group, levels = c("PBS", "aPDL1"))))
# select only 100+ dpi phenotype
d100.clones <- all.tp.withclusters %>% select(group, CDR3, Behavior, d100___Tcm, d100___Tem_2, d100___Tem_1, `d100___Tex-Prog` , `d100___Tex-Term`) %>%
rename(Long_Traj = Behavior,
Tcm = d100___Tcm,
Tem_2 = d100___Tem_2,
Tem_1 = d100___Tem_1,
`Tex-Prog` = `d100___Tex-Prog` ,
`Tex-Term` = `d100___Tex-Term`)
# look at which clones form what at d100+
d100.clones %>% pivot_longer(-c(group, CDR3,Long_Traj)) %>%
group_by(name, Long_Traj, group) %>% mutate(freqency = mean(value), nclones = length(CDR3)) %>%
ggplot(aes(x = name, y = Long_Traj, size = nclones, color = freqency)) + geom_point() + facet_wrap(~group) +
plot.theme.box + rotate_x_text(45) + scale_color_distiller(palette = "Reds", direction = "rev")
d100.clones %>% select(-c(CDR3, group, Long_Traj)) %>%
Heatmap(row_split = d100.clones$Long_Traj,
col = brewer.pal(9, "Purples"), border = T, row_title_rot = 0, name = "% of clone")
## Warning: The input is a data frame-like object, convert it to a matrix.
# Function to calculate fate bias significance by hypergeometric test
calculate_fate_bias_significance <- function(M, m, N, n) {
# Perform one-sided hypergeometric test
p_value <- phyper(m - 1, n, N - n, M, lower.tail = FALSE)
# Example usage:
# M: total cells in differentiated states for this clone
# m: cells in the fate cluster of interest for this clone
# N: total cells in differentiated states across all clones
# n: total cells in the fate cluster of interest across all clones
return(p_value)
}
# get data from each clone - add proper values for M m N n
bias.df <-
clone.freq.longitudinal %>% pivot_longer(cols = -c(group,CDR3, Behavior), names_to = "Pop", values_to = "n") %>%
filter(grepl("d100", Pop)) %>%
mutate(n = n*1e6) %>%
group_by(Pop) %>% mutate(Total_Count_Sort = sum(n)) %>% ungroup() %>% # add total cells per sort across clones (n)
group_by(group,CDR3) %>% mutate(Total_Count_Clone = sum(n)) %>% ungroup() %>% # add total cells per clone (M)
mutate(Total_Count_Total = sum(n)) %>% # add the total number of cells (N)
dplyr::rename(N = Total_Count_Total, M = Total_Count_Clone, n = Total_Count_Sort, m = n)
# calulte one-sided hypergeometric test
bias.df$p_value <- mapply(calculate_fate_bias_significance, bias.df$M, bias.df$m, bias.df$N, bias.df$n)
bias.df$p_value_adj <- p.adjust(bias.df$p_value , method = "hochberg")
# look on a heatmap & compare phenotype prop vs. hypergeometic
hm.prop <- bias.df %>%
pivot_wider(names_from = Pop, values_from = p_value_adj, id_cols = c(group, CDR3, Behavior), values_fill = 1) %>%
merge(d100.clones) %>%
select("d100--Tcm", "d100--Tem_2", "d100--Tem_1", "d100--Tex-Prog", "d100--Tex-Term") %>% Heatmap(name = "P adj", col = viridis(9), border = T)
## Warning: The input is a data frame-like object, convert it to a matrix.
hm.hyper <- bias.df %>%
pivot_wider(names_from = Pop, values_from = p_value_adj, id_cols = c(group, CDR3, Behavior), values_fill = 1) %>%
merge(d100.clones) %>%
select("Tcm", "Tem_2", "Tem_1", "Tex-Prog", "Tex-Term") %>% Heatmap(name = "Fraction of Clone", col = brewer.pal(9, "Blues"), border = T)
## Warning: The input is a data frame-like object, convert it to a matrix.
hm.hyper + hm.prop
# select lowest P vlaue per clone and say a clone is biased towards a phenotype based on lowest P value that is sig
bias.wide <- bias.df %>%
pivot_wider(names_from = Pop, values_from = p_value_adj, id_cols = c(group, CDR3, Behavior), values_fill = 1)
bias.wide <- bias.wide %>% select(c("d100--Tcm" ,"d100--Tem_2", "d100--Tem_1","d100--Tex-Prog", "d100--Tex-Term")) %>%
rowwise() %>% # Enable row-wise operations
mutate(
pval = min(c_across(contains("d100")), na.rm = TRUE), # Find the minimum value across "d100" columns
min_p = names(.)[which.min(c_across(contains("d100")))] # Find the name of the column with the minimum value
) %>%
mutate(min_p = ifelse(pval < 0.05, min_p, "ns")) %>%
cbind(bias.wide %>% select(group, CDR3, Behavior)) %>%
mutate(min_p = case_when(
min_p == "d100--Tcm" ~ "Tcm",
min_p == "d100--Tem_2" ~ "Tem_2",
min_p == "d100--Tem_1" ~ "Tem_1",
min_p == "d100--Tex-Prog" ~ "Tex-Prog",
min_p == "d100--Tex-Term" ~ "Tex-Term",
min_p == "ns" ~ "No Bias")
)
# plot clones based on hypergeometic enrichment and longitudinal trajectory
bias.wide %>% count(group, Behavior, min_p) %>%
group_by(min_p) %>% mutate(order = sum(n)) %>%
ggplot(aes(x = fct_reorder(min_p , order), y = n, fill=Behavior)) +
geom_col(show.legend = F) +
facet_wrap(~group, scales = "free") +
plot.theme + rotate_x_text(45) +
scale_fill_manual(values = traj_pal) +
labs(x = "Clonal Bias 100+ dpi", y = "# of clones") +
scale_y_continuous(expand = expansion(mult = c(0.0, 0.05)))
# set up dataframe logistic regression ===============================
regress.df <- clone.freq.longitudinal %>%
mutate(group = ifelse(group == "PBS", 0, 1)) # set group = 1 if clone is in the aPDL1 group
# Run logistic regression model with Clone_Behavior as predictor
model <- glm(group ~ Behavior, data = regress.df, family = binomial)
summary(model)
##
## Call:
## glm(formula = group ~ Behavior, family = binomial, data = regress.df)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.02553 0.09226 0.277 0.78197
## BehaviorTraj_2 -0.47612 0.14528 -3.277 0.00105 **
## BehaviorTraj_3 0.72824 0.43856 1.661 0.09681 .
## BehaviorTraj_4 -0.21742 0.20884 -1.041 0.29784
## BehaviorTraj_5 0.22842 0.12063 1.894 0.05828 .
## BehaviorTraj_6 -0.94870 0.23810 -3.984 6.77e-05 ***
## BehaviorTraj_7 1.37033 0.23769 5.765 8.16e-09 ***
## BehaviorTraj_8 0.37993 0.31797 1.195 0.23214
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 2625.2 on 1894 degrees of freedom
## Residual deviance: 2522.8 on 1887 degrees of freedom
## AIC: 2538.8
##
## Number of Fisher Scoring iterations: 4
# Create new data frame with unique values of Behaviors and Generate predictions based on Behavior
new_data <- data.frame(Behavior = unique(regress.df$Behavior))
predictions <- predict(model, new_data, type = "response", se.fit = TRUE)
predictions
## $fit
## 1 2 3 4 5 6 7 8
## 0.5063830 0.8015267 0.6000000 0.5631501 0.3892216 0.2843137 0.4521739 0.6800000
##
## $se.fit
## 1 2 3 4 5 6 7
## 0.02306140 0.03484771 0.07302967 0.01911924 0.02667885 0.04466423 0.04641146
## 8
## 0.09329520
##
## $residual.scale
## [1] 1
# Store predictions and standard errors in the new data frame
new_data$fit <- predictions$fit
new_data$se.fit <- predictions$se.fit
# plot comparisons
ggplot(data = new_data, aes(x = fct_reorder(Behavior, fit), y = fit, color = Behavior)) +
geom_hline(yintercept = 0.5, linetype = "dashed", color = "grey80") +
geom_point(size = 2.5) +
geom_errorbar(aes(ymin = fit - 1.96 * se.fit, ymax = fit + 1.96 * se.fit, width = 0.2), width = 0.25) + # 95% confidence interval
plot.theme +
# ylim(NA, max_y * 1.4) +
ylab("Predicted Enrichment in aPDL1 vs Ctrl") +
xlab("Longitundinal Clone Trajectory") +
theme(legend.position = "none") +
facet_wrap(~" ") +
scale_y_continuous(limits = c(0,1.01), breaks = c(0,0.5,1)) +
scale_color_manual(values = brewer.pal(10, "Paired")) + rotate_x_text(45)
clones.d7 <- clone.df %>% filter(Sample %in% c("C103_GP33neg", "C103_GP33pos"))
clone.freq.longitudinal %>% mutate(early = ifelse(CDR3 %in% clones.d7$TRB_CDR3, "early", "novel")) %>%
count(group, Behavior, early) %>%
group_by(Behavior, group) %>% mutate(n=n/sum(n)) %>%
ggplot(aes(x = Behavior, y = n, fill = early)) + geom_col() + facet_wrap(~group) + rotate_x_text()
clone.freq.longitudinal %>% mutate(early = ifelse(CDR3 %in% clones.d7$TRB_CDR3, "early", "novel")) %>%
count(group, Behavior, early) %>%
# group_by(Behavior, group) %>% mutate(n=n/sum(n)) %>%
ggplot(aes(x = Behavior, y = n, fill = early)) + geom_col() + facet_wrap(~group) + rotate_x_text()